Differing Water Quality in Protected and Unprotected U.S. Southwest River Basins

Author
Affiliation

Samantha Nauman, Madison Schartz, Hanna Velicer

Colorado State University

Other Formats

Drafting the Introduction, Background, and Motivation for the Project

BLAH

Exploring the Data

Code
library(tidyverse)
Code
library(dataRetrieval)
library(ggplot2)
library(plotly)
Code
library(lubridate)
library(ggpubr)
library(plotly)
library(dplyr)
library(tsibble)
Code
  # CO unprotected 1
QCO_unpro1=readNWISdv(
  siteNumber= c('09034250'),
  parameterCd= c('00060'),
  startDate='1975-10-01', #beginning of wateryear 1976
  endDate='2024-10-01') %>%
  renameNWISColumns() %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(Flow = mean(Flow, na.rm = TRUE), .groups = "drop")

pHWT_COunpro1 <- read.csv(
  "https://raw.githubusercontent.com/snauman4/ESS330_FinalProject/main/CO_dataunprotected1.csv",
  stringsAsFactors = FALSE
)

# 2. Compute daily means of Temp and pH
DailypW_COunpro1 <- pHWT_COunpro1 %>%
  # parse your Date
  mutate(Date = mdy(Date)) %>%
  # drop the qualifier columns and coerce the true max/min to numeric
  mutate(across(
    c(Temp..max., Temp..min., pH..max., pH..min.),
    ~ as.numeric(.)
  )) %>%
  # compute a rowwise mean of temp and of pH
  mutate(
    Temp_mean = rowMeans(select(., Temp..max., Temp..min.), na.rm = TRUE),
    pH_mean   = rowMeans(select(., pH..max.,    pH..min.),    na.rm = TRUE)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(Temp_mean, na.rm = TRUE),
    `pH (mean)` = mean(pH_mean,   na.rm = TRUE),
    .groups = "drop"
  )
Code
# 3. Roll those daily means up to monthly to line up with your flow data
Month_COunpro1 <- DailypW_COunpro1 %>%
  mutate(
    Date = yearmonth(Date)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(`Temp (C)`,  na.rm = TRUE),
    `pH (mean)` = mean(`pH (mean)`, na.rm = TRUE),
    .groups = "drop"
  )

# make sure site_no is character in both data sets
QCO_unpro1 <- QCO_unpro1 %>%
  mutate(
    site_no = as.character(site_no),        # ensure it’s a string
    Date    = yearmonth(Date)               # ensure it’s a yearmonth
  )

Month_COunpro1 <- Month_COunpro1 %>%
  mutate(
    site_no = str_pad(as.character(site_no), width = 8, pad = "0"),
       # pad out to 8 digits with leading zeros
    Date    = yearmonth(Date)
  )

# now the join will work
COdata_unprotected1 <- QCO_unpro1 %>%
  left_join(Month_COunpro1, by = c("site_no","Date")) %>%
  select(site_no, Date, Flow, `Temp (C)`, `pH (mean)`)

  # CO unprotected 2
QCO_unpro2=readNWISdv(
  siteNumber= c('09147500'),
  parameterCd= c('00060'),
  startDate='1975-10-01', #beginning of wateryear 1976
  endDate='2024-10-01') %>%
  renameNWISColumns() %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(Flow = mean(Flow, na.rm = TRUE), .groups = "drop")

pHWT_COunpro2 <- read.csv(
  "https://raw.githubusercontent.com/snauman4/ESS330_FinalProject/main/CO_dataunprotected2.csv",
  stringsAsFactors = FALSE
)

# 2. Compute daily means of Temp and pH
DailypW_COunpro2 <- pHWT_COunpro2 %>%
  # parse your Date
  mutate(Date = mdy(Date)) %>%
  # drop the qualifier columns and coerce the true max/min to numeric
  mutate(across(
    c(Temp..max., Temp..min.),
    ~ as.numeric(.)
  )) %>%
  # compute a rowwise mean of temp and of pH
  mutate(
    Temp_mean = rowMeans(select(., Temp..max., Temp..min.), na.rm = TRUE)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(Temp_mean, na.rm = TRUE),
    .groups = "drop"
  )
Code
# 3. Roll those daily means up to monthly to line up with your flow data
Month_COunpro2 <- DailypW_COunpro2 %>%
  mutate(
    Date = yearmonth(Date)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(`Temp (C)`,  na.rm = TRUE),
    .groups = "drop"
  )

# make sure site_no is character in both data sets
QCO_unpro2 <- QCO_unpro2 %>%
  mutate(
    site_no = as.character(site_no),        # ensure it’s a string
    Date    = yearmonth(Date)               # ensure it’s a yearmonth
  )

Month_COunpro2 <- Month_COunpro2 %>%
  mutate(
    site_no = str_pad(as.character(site_no), width = 8, pad = "0"),
       # pad out to 8 digits with leading zeros
    Date    = yearmonth(Date)
  )

# now the join will work
COdata_unprotected2 <- QCO_unpro2 %>%
  left_join(Month_COunpro2, by = c("site_no","Date")) %>%
  select(site_no, Date, Flow, `Temp (C)`)

  # CA unprotected 
QCA_unpro=readNWISdv(
  siteNumber= c('11074000'),
  parameterCd= c('00060'),
  startDate='1975-10-01', #beginning of wateryear 1976
  endDate='2024-10-01') %>%
  renameNWISColumns() %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(Flow = mean(Flow, na.rm = TRUE), .groups = "drop")

pHWT_CAunpro <- read.csv(
  "https://raw.githubusercontent.com/snauman4/ESS330_FinalProject/main/CA_dataunprotected.csv",
  stringsAsFactors = FALSE
)

# 2. Compute daily means of Temp and pH
DailypW_CAunpro <- pHWT_CAunpro %>%
  # parse your Date
  mutate(Date = mdy(Date)) %>%
  # drop the qualifier columns and coerce the true max/min to numeric
  mutate(across(
    c(Temp..max., Temp..min.),
    ~ as.numeric(.)
  )) %>%
  # compute a rowwise mean of temp and of pH
  mutate(
    Temp_mean = rowMeans(select(., Temp..max., Temp..min.), na.rm = TRUE)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(Temp_mean, na.rm = TRUE),
    .groups = "drop"
  )

# 3. Roll those daily means up to monthly to line up with your flow data
Month_CAunpro <- DailypW_CAunpro %>%
  mutate(
    Date = yearmonth(Date)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(`Temp (C)`,  na.rm = TRUE),
    .groups = "drop"
  )

# make sure site_no is character in both data sets
QCA_unpro <- QCA_unpro %>%
  mutate(
    site_no = as.character(site_no),        # ensure it’s a string
    Date    = yearmonth(Date)               # ensure it’s a yearmonth
  )

Month_CAunpro <- Month_CAunpro %>%
  mutate(
    site_no = str_pad(as.character(site_no), width = 8, pad = "0"),
    Date    = yearmonth(Date))

CAdata_unprotected <- QCA_unpro %>%
  left_join(Month_CAunpro, by = c("site_no","Date")) %>%
  select(site_no, Date, Flow, `Temp (C)`)

  # AZ unprotected 
AZunproraw <- read.csv(
  "https://raw.githubusercontent.com/madi-schartz/ESS330-FINAL/refs/heads/master/AZdata_unprotected.csv",
  stringsAsFactors = FALSE
)

# add pH
DailypW_AZunpro <- AZunproraw %>%
  mutate(Date = mdy(Date)) %>% 
  mutate(across(
    c(Temp..mean.),
    ~ as.numeric(.)
  )) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Discharge..Mean., na.rm = TRUE),
    `Temp (C)` = mean(Temp..mean.,        na.rm = TRUE),
    .groups    = "drop"
  )

AZdata_unprotected <- DailypW_AZunpro %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Flow,       na.rm = TRUE),
    `Temp (C)` = mean(`Temp (C)`, na.rm = TRUE),
    .groups    = "drop"
  )

  # UT unprotected
UTunproraw <- read.csv(
  "https://raw.githubusercontent.com/madi-schartz/ESS330-FINAL/refs/heads/master/UTdata_unprotected.csv",
  stringsAsFactors = FALSE
)

DailypW_UTunpro <- UTunproraw %>%
  mutate(Date = mdy(Date)) %>% 
  mutate(across(
    c(Discharge,`Temp..Min.`,`Temp..Max.`, `pH..Max.`, `pH..Min.`),
    ~ as.numeric(.)
  )) %>%
  mutate(
    Temp_mean = rowMeans(select(., Temp..Max.,  Temp..Min.), na.rm = TRUE),
    pH_mean   = rowMeans(select(., pH..Max.,     pH..Min.),    na.rm = TRUE)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Discharge, na.rm = TRUE),
    `Temp (C)` = mean(Temp_mean,        na.rm = TRUE),
    `pH (mean)`= mean(pH_mean,          na.rm = TRUE),
    .groups    = "drop"
  )
Code
# 3. roll up to monthly
UTdata_unprotected <- DailypW_UTunpro %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Flow,       na.rm = TRUE),
    `Temp (C)` = mean(`Temp (C)`, na.rm = TRUE),
    `pH (mean)`= mean(`pH (mean)`,na.rm = TRUE),
    .groups    = "drop"
  )
Source: Article Notebook
Code
head(COdata_unprotected1)
# A tibble: 6 × 5
  site_no      Date  Flow `Temp (C)` `pH (mean)`
  <chr>       <mth> <dbl>      <dbl>       <dbl>
1 09034250 1981 Oct  59.9         NA          NA
2 09034250 1981 Nov  76.5         NA          NA
3 09034250 1981 Dec  64.3         NA          NA
4 09034250 1982 Jan  65.5         NA          NA
5 09034250 1982 Feb  63.5         NA          NA
6 09034250 1982 Mar  84.6         NA          NA
Code
head(COdata_unprotected2)
# A tibble: 6 × 4
  site_no      Date  Flow `Temp (C)`
  <chr>       <mth> <dbl>      <dbl>
1 09147500 1975 Oct  81.6         NA
2 09147500 1975 Nov 118.          NA
3 09147500 1975 Dec 110.          NA
4 09147500 1976 Jan  72.3         NA
5 09147500 1976 Feb  81.4         NA
6 09147500 1976 Mar  90.6         NA
Code
head(CAdata_unprotected)
# A tibble: 6 × 4
  site_no      Date  Flow `Temp (C)`
  <chr>       <mth> <dbl>      <dbl>
1 11074000 1975 Oct  86.4      18.3 
2 11074000 1975 Nov 176.       12.7 
3 11074000 1975 Dec 278.       10.3 
4 11074000 1976 Jan 293.        9.62
5 11074000 1976 Feb 241.       12.9 
6 11074000 1976 Mar 234.       13.4 
Code
head(UTdata_unprotected)
# A tibble: 6 × 5
  site_no     Date   Flow `Temp (C)` `pH (mean)`
    <int>    <mth>  <dbl>      <dbl>       <dbl>
1 9315000 1975 Apr  4973.        NaN         NaN
2 9315000 1975 May 10606.        NaN         NaN
3 9315000 1975 Jun 21057.        NaN         NaN
4 9315000 1975 Jul 14614.        NaN         NaN
5 9315000 1975 Aug  5173.        NaN         NaN
6 9315000 1975 Sep  2872         NaN         NaN
Code
  # CO protected 1
QCO_pro1=readNWISdv(
  siteNumber= c('06752260'),
  parameterCd= c('00060'),
  startDate='1975-10-01', #beginning of wateryear 1976
  endDate='2024-10-01') %>%
  renameNWISColumns() %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(Flow = mean(Flow, na.rm = TRUE), .groups = "drop")

pHWT_COpro1 <- read.csv(
  "https://raw.githubusercontent.com/snauman4/ESS330_FinalProject/main/CO_dataprotected1.csv",
  stringsAsFactors = FALSE
)

DailypW_COpro1 <- pHWT_COpro1 %>%
  mutate(Date = mdy(Date)) %>%
  mutate(across(
    c(Temp..max., Temp..min.),
    ~ as.numeric(.))) %>%
  mutate(
    Temp_mean = rowMeans(select(., Temp..max., Temp..min.), na.rm = TRUE),
    pH_mean = rowMeans(select(., pH..max., pH..min.), na.rm = TRUE)) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(Temp_mean, na.rm = TRUE),
    `pH (mean)` = mean(pH_mean, na.rm = TRUE),
    .groups = "drop")

Month_COpro1 <- DailypW_COpro1 %>%
  mutate(
    Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(`Temp (C)`,  na.rm = TRUE),
    `pH (mean)` = mean(`pH (mean)`, na.rm = TRUE),
    .groups = "drop")

QCO_pro1 <- QCO_pro1 %>%
  mutate(
    site_no = as.character(site_no),        
    Date    = yearmonth(Date))

Month_COpro1 <- Month_COpro1 %>%
  mutate(
    site_no = str_pad(as.character(site_no), width = 8, pad = "0"),
    Date    = yearmonth(Date))

COdata_protected1 <- QCO_pro1 %>%
  left_join(Month_COpro1, by = c("site_no","Date")) %>%
  select(site_no, Date, Flow, `Temp (C)`, `pH (mean)`)

  # CO protected 2
QCO_pro2=readNWISdv(
  siteNumber= c('09163500'),
  parameterCd= c('00060'),
  startDate='1975-10-01', #beginning of wateryear 1976
  endDate='2024-10-01') %>%
  renameNWISColumns() %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(Flow = mean(Flow, na.rm = TRUE), .groups = "drop")

pHWT_COpro2 <- read.csv(
  "https://raw.githubusercontent.com/snauman4/ESS330_FinalProject/main/CO_dataprotected2.csv",
  stringsAsFactors = FALSE
)

DailypW_COpro2 <- pHWT_COpro2 %>%
  mutate(Date = mdy(Date)) %>%
  mutate(across(
    c(Temp..max., Temp..min.),
    ~ as.numeric(.))) %>%
  mutate(
    Temp_mean = rowMeans(select(., Temp..max., Temp..min.), na.rm = TRUE),
    pH_mean = rowMeans(select(., pH..max., pH..min.), na.rm = TRUE)) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(Temp_mean, na.rm = TRUE),
    `pH (mean)` = mean(pH_mean, na.rm = TRUE),
    .groups = "drop")

Month_COpro2 <- DailypW_COpro2 %>%
  mutate(
    Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(`Temp (C)`,  na.rm = TRUE),
    `pH (mean)` = mean(`pH (mean)`, na.rm = TRUE),
    .groups = "drop")

QCO_pro2 <- QCO_pro2 %>%
  mutate(
    site_no = as.character(site_no),       
    Date    = yearmonth(Date))

Month_COpro2 <- Month_COpro2 %>%
  mutate(
    site_no = str_pad(as.character(site_no), width = 8, pad = "0"),
    Date    = yearmonth(Date))

COdata_protected2 <- QCO_pro2 %>%
  left_join(Month_COpro2, by = c("site_no","Date")) %>%
  select(site_no, Date, Flow, `Temp (C)`, `pH (mean)`)

  # CA protected
QCA_pro=readNWISdv(
  siteNumber= c('11044000'),
  parameterCd= c('00060'),
  startDate='1975-10-01', #beginning of wateryear 1976
  endDate='2024-10-01') %>%
  renameNWISColumns() %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(Flow = mean(Flow, na.rm = TRUE), .groups = "drop")

pHWT_CApro <- read.csv(
  "https://raw.githubusercontent.com/snauman4/ESS330_FinalProject/main/CA_dataprotected.csv",
  stringsAsFactors = FALSE
)

DailypW_CApro <- pHWT_CApro %>%
  mutate(Date = mdy(Date)) %>%
  mutate(across(
    c(Temp..max., Temp..min.,`pH..max.`, `pH..min.`),
    ~ as.numeric(.),
    .names = "{.col}")
  ) %>%
  mutate(
    Temp_mean = rowMeans(select(., Temp..max., Temp..min.), na.rm = TRUE),
    pH_mean   = rowMeans(select(., pH..max.,    pH..min.),    na.rm = TRUE)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(Temp_mean, na.rm = TRUE),
    `pH (mean)` = mean(pH_mean, na.rm = TRUE),
    .groups = "drop"
  )
Code
Month_CApro <- DailypW_CApro %>%
  mutate(
    Date = yearmonth(Date)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    `Temp (C)`  = mean(`Temp (C)`,  na.rm = TRUE),
    `pH (mean)` = mean(`pH (mean)`, na.rm = TRUE),
    .groups = "drop"
  )

QCA_pro <- QCA_pro %>%
  mutate(
    site_no = as.character(site_no),       
    Date    = yearmonth(Date)               
  )

Month_CApro <- Month_CApro %>%
  mutate(
    site_no = str_pad(as.character(site_no), width = 8, pad = "0"),
    Date    = yearmonth(Date)
  )

CAdata_protected <- QCA_pro %>%
  left_join(Month_CApro, by = c("site_no","Date")) %>%
  select(site_no, Date, Flow, `Temp (C)`, `pH (mean)`)

  # AZ data 
AZraw <- read.csv(
  "https://raw.githubusercontent.com/hvelicer/ESS330_finalproject/refs/heads/main/AZdata_protected.csv",
  stringsAsFactors = FALSE
)

DailypW_AZpro <- AZraw %>%
  mutate(Date = mdy(Date)) %>% 
  mutate(across(
    c(Temp..max., Temp..min., `pH..max.`, `pH..min.`),
    ~ as.numeric(.)
  )) %>%
  mutate(
    Temp_mean = rowMeans(select(., Temp..max.,  Temp..min.), na.rm = TRUE),
    pH_mean   = rowMeans(select(., pH..max.,     pH..min.),    na.rm = TRUE)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Discharge..mean., na.rm = TRUE),
    `Temp (C)` = mean(Temp..mean.,        na.rm = TRUE),
    `pH (mean)`= mean(pH_mean,          na.rm = TRUE),
    .groups    = "drop"
  )

AZdata_protected <- DailypW_AZpro %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Flow,       na.rm = TRUE),
    `Temp (C)` = mean(`Temp (C)`, na.rm = TRUE),
    `pH (mean)`= mean(`pH (mean)`,na.rm = TRUE),
    .groups    = "drop"
  )

  # UT data
UTraw <- read.csv(
  "https://raw.githubusercontent.com/hvelicer/ESS330_finalproject/main/UTdata_protected.csv",
  stringsAsFactors = FALSE
)

DailypW_UTpro <- UTraw %>%
  mutate(Date = mdy(Date)) %>% 
  mutate(across(
    c(Temp..max., Temp..min., `pH..max.`, `pH..min.`),
    ~ as.numeric(.)
  )) %>%
  mutate(
    Temp_mean = rowMeans(select(., Temp..max.,  Temp..min.), na.rm = TRUE),
    pH_mean   = rowMeans(select(., pH..max.,     pH..min.),    na.rm = TRUE)
  ) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Discharge..mean., na.rm = TRUE),
    `Temp (C)` = mean(Temp_mean,        na.rm = TRUE),
    `pH (mean)`= mean(pH_mean,          na.rm = TRUE),
    .groups    = "drop"
  )

UTdata_protected <- DailypW_UTpro %>%
  mutate(Date = yearmonth(Date)) %>%
  group_by(site_no, Date) %>%
  summarise(
    Flow       = mean(Flow,       na.rm = TRUE),
    `Temp (C)` = mean(`Temp (C)`, na.rm = TRUE),
    `pH (mean)`= mean(`pH (mean)`,na.rm = TRUE),
    .groups    = "drop"
  )
Code
head(COdata_protected1)
# A tibble: 6 × 5
  site_no      Date  Flow `Temp (C)` `pH (mean)`
  <chr>       <mth> <dbl>      <dbl>       <dbl>
1 06752260 1975 Oct  7.23         NA          NA
2 06752260 1975 Nov  4.47         NA          NA
3 06752260 1975 Dec  4.09         NA          NA
4 06752260 1976 Jan  3.46         NA          NA
5 06752260 1976 Feb  3.51         NA          NA
6 06752260 1976 Mar  2.91         NA          NA
Code
head(COdata_protected2)
# A tibble: 6 × 5
  site_no      Date  Flow `Temp (C)` `pH (mean)`
  <chr>       <mth> <dbl>      <dbl>       <dbl>
1 09163500 1975 Oct 3945.         NA          NA
2 09163500 1975 Nov 4814.         NA          NA
3 09163500 1975 Dec 4620          NA          NA
4 09163500 1976 Jan 4000.         NA          NA
5 09163500 1976 Feb 3768.         NA          NA
6 09163500 1976 Mar 3658.         NA          NA
Code
head(CAdata_protected)
# A tibble: 6 × 5
  site_no      Date  Flow `Temp (C)` `pH (mean)`
  <chr>       <mth> <dbl>      <dbl>       <dbl>
1 11044000 1975 Oct  2.03         NA          NA
2 11044000 1975 Nov  2.19         NA          NA
3 11044000 1975 Dec  2.37         NA          NA
4 11044000 1976 Jan  2.35         NA          NA
5 11044000 1976 Feb 10.2          NA          NA
6 11044000 1976 Mar  5.79         NA          NA
Code
head(AZdata_protected)
# A tibble: 6 × 5
  site_no     Date  Flow `Temp (C)` `pH (mean)`
    <int>    <mth> <dbl>      <dbl>       <dbl>
1 9510000 1975 Oct  239.        NaN         NaN
2 9510000 1975 Nov  517.        NaN         NaN
3 9510000 1975 Dec  433.        NaN         NaN
4 9510000 1976 Jan  351.        NaN         NaN
5 9510000 1976 Feb  541.        NaN         NaN
6 9510000 1976 Mar 1105.        NaN         NaN
Code
head(UTdata_protected)
# A tibble: 6 × 5
  site_no     Date  Flow `Temp (C)` `pH (mean)`
    <int>    <mth> <dbl>      <dbl>       <dbl>
1 9261000 1975 Oct 1872.        NaN         NaN
2 9261000 1975 Nov 2643.        NaN         NaN
3 9261000 1975 Dec 4001.        NaN         NaN
4 9261000 1976 Jan 3462.        NaN         NaN
5 9261000 1976 Feb 2824.        NaN         NaN
6 9261000 1976 Mar 3118.        NaN         NaN
Code
# 1) bind & tag
unprotected_all <- bind_rows(
  COdata_unprotected1 %>% mutate(site_no = as.character(site_no)),
  COdata_unprotected2 %>% mutate(site_no = as.character(site_no)),
  CAdata_unprotected   %>% mutate(site_no = as.character(site_no)),
  AZdata_unprotected   %>% mutate(site_no = as.character(site_no)),
  UTdata_unprotected   %>% mutate(site_no = as.character(site_no))
) %>%
  mutate(protection = "Unprotected")

protected_all <- bind_rows(
  COdata_protected1 %>% mutate(site_no = as.character(site_no)),
  COdata_protected2 %>% mutate(site_no = as.character(site_no)),
  CAdata_protected  %>% mutate(site_no = as.character(site_no)),
  AZdata_protected  %>% mutate(site_no = as.character(site_no)),
  UTdata_protected  %>% mutate(site_no = as.character(site_no))
) %>%
  mutate(protection = "Protected")

# 2) factorize **after** sub‐setting (so you only get the levels you actually have)
unprotected_all <- unprotected_all %>% mutate(site_no = factor(site_no))
protected_all   <- protected_all   %>% mutate(site_no = factor(site_no))

# 3) helper
make_panel <- function(df, yvar, title) {
  ggplot(df, aes(x = Date, y = .data[[yvar]], colour = site_no)) +
    geom_line(size = 0.3,   na.rm = TRUE) +
    geom_point(size = 0.8,  na.rm = TRUE) +
    labs(title = title, x = NULL, y = NULL, colour = "Site") +
    theme_minimal()
}

# 4) build
p_flow_un <- make_panel(unprotected_all, "Flow",      "Unprotected Flow")
Code
ggplotly(p_flow_un)
Code
p_temp_un <- make_panel(unprotected_all, "Temp (C)",  "Unprotected Temp (C)")
ggplotly(p_temp_un)
Code
p_ph_un   <- make_panel(unprotected_all, "pH (mean)", "Unprotected pH (mean)")
ggplotly(p_ph_un)
Code
p_flow_pr <- make_panel(protected_all,   "Flow",      "Protected Flow")
ggplotly(p_flow_pr)
Code
p_temp_pr <- make_panel(protected_all,   "Temp (C)",  "Protected Temp (C)")
ggplotly(p_temp_pr)
Code
p_ph_pr   <- make_panel(protected_all,   "pH (mean)", "Protected pH (mean)")
ggplotly(p_ph_pr)
Code
# 5) stack
Yearly_un <- ggarrange(
  p_flow_un, p_temp_un, p_ph_un,
  ncol = 1, nrow = 3,
  common.legend = TRUE, legend = "bottom"
)

Yearly_pr <- ggarrange(
  p_flow_pr, p_temp_pr, p_ph_pr,
  ncol = 1, nrow = 3,
  common.legend = TRUE, legend = "bottom"
)

Yearly_both <- ggarrange(
  Yearly_un, Yearly_pr,
  ncol   = 2, widths = c(1,1),
  label.x = c(0.05, 0.55),
  label.y = 1.02
)

print(Yearly_both)

Code
# Looking at seasonal data
library(tsibble)
library(feasts)
Code
library(plotly)

# turn your data into a tsibble
ts_unprotected <- unprotected_all %>%
  as_tsibble(key = site_no, index = Date)
ts_protected <- protected_all %>% 
  as_tsibble(key = site_no, index = Date)

# 1) Unprotected
p_sub_flow_un <- ts_unprotected %>% 
  gg_subseries(Flow) +
  labs(title="Unprotected Flow Subseries")
p_sub_temp_un <- ts_unprotected %>% 
  gg_subseries(`Temp (C)`) +
  labs(title="Unprotected Temp (C) Subseries")
p_sub_ph_un <- ts_unprotected %>% 
  gg_subseries(`pH (mean)`) +
  labs(title="Unprotected pH (mean) Subseries")

# 2) Protected 
p_sub_flow_pr <- ts_protected %>% 
  gg_subseries(Flow) +
  labs(title="Protected Flow Subseries")
p_sub_temp_pr <- ts_protected %>% 
  gg_subseries(`Temp (C)`) +
  labs(title="Protected Temp (C) Subseries")
p_sub_ph_pr <- ts_protected %>% 
  gg_subseries(`pH (mean)`) +
  labs(title="Protected pH (mean) Subseries")

ggplotly(p_sub_flow_un)
Code
ggplotly(p_sub_temp_un)
Code
ggplotly(p_sub_ph_un)
Code
ggplotly(p_sub_flow_pr)
Code
ggplotly(p_sub_temp_pr)
Code
ggplotly(p_sub_ph_pr)